Chapter 5 Community composition

load("data/data.Rdata")
#sample_metadata <- read_csv("data/sample_metadata.csv")

5.1 Taxonomy overview

5.1.1 Stacked barplot

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(. ~ region + environment + treatment,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

Number of bacteria phyla

[1] 13

5.1.2 Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum,region, environment,treatment) %>%
  summarise(relabun=sum(count))
phylum_summary %>%
    group_by(phylum) %>%
    summarise(total_mean=mean(relabun*100, na.rm=T),
              total_sd=sd(relabun*100, na.rm=T))  %>%
    mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2))) %>% 
    arrange(-total_mean) %>% 
    dplyr::select(phylum,total) %>% 
    tt()
tinytable_6990c00seg0ejzm0i5xm
phylum total
p__Bacteroidota 55.63±15.89
p__Bacillota_A 18.37±6.29
p__Pseudomonadota 10.83±12.9
p__Bacillota 5.01±8.39
p__Verrucomicrobiota 4.83±4.78
p__Desulfobacterota 1.93±1.73
p__Fusobacteriota 1.22±2.09
p__Deferribacterota 0.78±1.08
p__Bacillota_C 0.59±0.89
p__Cyanobacteriota 0.45±0.56
p__Bacillota_B 0.17±0.15
p__Elusimicrobiota 0.15±0.42
p__Chlamydiota 0.05±0.1
phylum_arrange <- phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun)) %>%
    arrange(-mean) %>%
    select(phylum) %>%
    pull()

phylum_summary %>%
    filter(phylum %in% phylum_arrange) %>%
    mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
    ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
        scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        geom_jitter(alpha=0.5) + 
        theme_minimal() + 
        theme(legend.position="none") +
        labs(y="Phylum",x="Relative abundance")

5.2 Taxonomy boxplot

5.2.1 Family

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family) %>%
  summarise(relabun=sum(count))

family_summary %>%
    group_by(family) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) %>%
    tt()
tinytable_bne0cyc4bzu6mss2zux1
family mean sd
f__Bacteroidaceae 2.583672e-01 1.402038e-01
f__Rikenellaceae 1.497520e-01 7.690553e-02
f__Tannerellaceae 7.391528e-02 4.445621e-02
f__Ruminococcaceae 6.634698e-02 4.600300e-02
f__Lachnospiraceae 4.737236e-02 3.212973e-02
f__Akkermansiaceae 4.592234e-02 4.643987e-02
f__Enterobacteriaceae 3.966049e-02 8.976101e-02
f__Marinifilaceae 3.948347e-02 3.440221e-02
f__Aeromonadaceae 2.888392e-02 4.894755e-02
f__Mycoplasmoidaceae 2.767279e-02 8.415652e-02
f__ 2.201216e-02 1.936504e-02
f__Erysipelotrichaceae 2.124871e-02 1.736934e-02
f__Desulfovibrionaceae 1.925392e-02 1.729129e-02
f__Moraxellaceae 1.291154e-02 2.575427e-02
f__Oscillospiraceae 1.270564e-02 8.459566e-03
f__Clostridiaceae 1.236215e-02 1.829537e-02
f__Fusobacteriaceae 1.223384e-02 2.090091e-02
f__Cellulosilyticaceae 1.113335e-02 1.974877e-02
f__CAG-239 9.360822e-03 1.360659e-02
f__Butyricicoccaceae 8.930175e-03 2.594866e-02
f__Mucispirillaceae 7.841353e-03 1.081069e-02
f__CHK158-818 7.232655e-03 8.140593e-03
f__Anaerovoracaceae 6.035251e-03 8.674362e-03
f__Muribaculaceae 6.002582e-03 6.747350e-03
f__Peptostreptococcaceae 5.317766e-03 1.569151e-02
f__P3 5.228558e-03 8.382358e-03
f__UBA3637 4.852446e-03 9.697163e-03
f__Gastranaerophilaceae 4.273520e-03 5.543289e-03
f__Pumilibacteraceae 2.713899e-03 3.103888e-03
f__UBA932 2.573314e-03 3.835337e-03
f__Anaerotignaceae 2.540732e-03 2.377879e-03
f__Acutalibacteraceae 2.530090e-03 3.199571e-03
f__UBA3830 2.212427e-03 3.375679e-03
f__Chromobacteriaceae 1.858083e-03 8.490568e-03
f__Succinispiraceae 1.777645e-03 2.030364e-03
f__Massilibacillaceae 1.723616e-03 3.670656e-03
f__Pseudomonadaceae 1.506298e-03 2.753367e-03
f__Elusimicrobiaceae 1.488813e-03 4.220278e-03
f__UBA1997 1.435499e-03 4.204248e-03
f__Peptococcaceae 1.433042e-03 1.313017e-03
f__Chitinibacteraceae 1.330971e-03 3.241530e-03
f__Coprobacteraceae 9.066416e-04 1.450932e-03
f__Sedimentibacteraceae 8.685113e-04 1.168186e-03
f__CAG-508 8.538769e-04 4.698702e-03
f__Burkholderiaceae_A 8.306559e-04 2.376298e-03
f__Shewanellaceae 7.349097e-04 2.541624e-03
f__Coprobacillaceae 6.481948e-04 1.468116e-03
f__UBA1820 6.453674e-04 1.039951e-03
f__Xanthobacteraceae 5.704160e-04 2.271298e-03
f__Chlamydiaceae 4.705475e-04 1.047759e-03
f__GCF-1484045 3.968194e-04 2.347615e-03
f__CALVMC01 3.395805e-04 1.778833e-03
f__Borkfalkiaceae 3.253103e-04 5.978903e-04
f__UBA7702 2.752987e-04 6.465242e-04
f__Eubacteriaceae 2.752223e-04 4.951364e-04
f__UBA3700 1.468068e-04 8.685208e-04
f__CALYAR01 1.465184e-04 2.571231e-04
f__Enterococcaceae 9.119125e-05 5.394947e-04
f__UBA660 3.641870e-05 9.107289e-05
family_arrange <- family_summary %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    pull()

# Per origin
family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~environment)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

5.2.2 Genus

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,phylum,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__") %>%
  mutate(genus= sub("^g__", "", genus))

genus_summary_sort <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) 

genus_summary_sort %>%
    tt()
tinytable_gowz8etsznys97a15gx7
genus mean sd
Bacteroides 2.517446e-01 1.390618e-01
Mucinivorans 6.923418e-02 4.752036e-02
Parabacteroides 6.030294e-02 3.859090e-02
Aeromonas 2.888392e-02 4.894755e-02
Odoribacter 2.459781e-02 2.122265e-02
Akkermansia 2.443440e-02 3.399107e-02
Mycoplasma_L 2.305445e-02 8.469675e-02
JADFUS01 2.278804e-02 1.254175e-02
UBA866 2.223665e-02 2.645409e-02
Hafnia 1.703127e-02 8.653435e-02
Alistipes 1.610525e-02 1.296819e-02
Plesiomonas 1.419193e-02 3.093735e-02
Parabacteroides_B 1.361234e-02 1.262429e-02
Acinetobacter 1.291154e-02 2.575427e-02
Dielma 1.165436e-02 1.430759e-02
Cetobacterium 1.110575e-02 2.029156e-02
Clostridium 1.069384e-02 1.723953e-02
Bilophila 1.023454e-02 1.247143e-02
JAIHAL01 9.803405e-03 1.823757e-02
CAJGBR01 9.642751e-03 8.330717e-03
14-2 8.887441e-03 1.959989e-02
Angelakisella 8.653329e-03 7.804514e-03
Gallibacteroides 7.232655e-03 8.140593e-03
Clostridium_Q 6.859035e-03 8.336175e-03
Hydrogenoanaerobacterium 6.381847e-03 6.231932e-03
HGM05232 6.002582e-03 6.747350e-03
Bacteroides_G 5.561445e-03 6.407073e-03
Buttiauxella 4.894942e-03 1.436544e-02
SZUA-378 4.878108e-03 1.470108e-02
Malacoplasma 4.618338e-03 1.047538e-02
Hungatella_A 4.389518e-03 5.344594e-03
Anaerotruncus 4.275112e-03 4.410571e-03
Alistipes_A 4.186772e-03 3.802029e-03
Pseudoflavonifractor 3.942622e-03 3.930292e-03
Intestinimonas 3.873406e-03 3.447851e-03
Tidjanibacter 3.806538e-03 3.328943e-03
Anaerovorax 3.358325e-03 7.507482e-03
Avirikenella 3.116317e-03 4.004750e-03
RGIG3102 3.041666e-03 5.273174e-03
Gallalistipes 3.020969e-03 2.579968e-03
Anaerorhabdus 2.979294e-03 4.502520e-03
Paraclostridium 2.925988e-03 1.536966e-02
UMGS1251 2.753843e-03 4.370592e-03
Egerieousia 2.573314e-03 3.835337e-03
JAGAJR01 2.390996e-03 5.196731e-03
Mobilisporobacter 2.341649e-03 4.066854e-03
UMGS1202 2.027421e-03 1.987383e-03
JAAYQI01 1.902859e-03 2.200810e-03
Craterilacuibacter 1.858083e-03 8.490568e-03
Copranaerobaculum 1.829992e-03 8.237395e-03
Amedibacillus 1.694844e-03 2.379494e-03
Butyricimonas 1.692717e-03 2.006344e-03
JAHHTP01 1.680060e-03 2.017330e-03
Sarcina 1.668308e-03 3.171415e-03
Negativibacillus 1.561698e-03 1.820190e-03
Intestinibacillus 1.537995e-03 1.677494e-03
Rikenella 1.513511e-03 2.463151e-03
Pseudomonas_E 1.506298e-03 2.753367e-03
Ruthenibacterium 1.471374e-03 2.597706e-03
Evtepia 1.454607e-03 1.692671e-03
Romboutsia_D 1.377166e-03 3.495412e-03
Deefgea 1.330971e-03 3.241530e-03
Phocea 1.290108e-03 2.335155e-03
Spyradomonas 1.283614e-03 2.009466e-03
Budvicia 1.267490e-03 6.379230e-03
JAGNZR01 1.128086e-03 3.938589e-03
UBA7488 1.112285e-03 2.162473e-03
RGIG4140 1.069355e-03 6.021280e-03
Aminipila 1.067588e-03 2.200351e-03
WRKB01 1.061160e-03 2.721916e-03
Romboutsia_A 1.014612e-03 1.751435e-03
Serratia_A 9.262594e-04 3.342514e-03
CAKVBE01 9.105039e-04 3.028369e-03
Coprobacter 9.066416e-04 1.450932e-03
RGIG7389 9.031113e-04 1.056226e-03
RGIG8482 8.538769e-04 4.698702e-03
JAEZVV01 8.306559e-04 2.376298e-03
Massiliimalia 8.201689e-04 1.353876e-03
JAJBUQ01 8.132764e-04 1.303446e-03
Robinsoniella 7.893139e-04 1.692794e-03
Bacilliculturomica 7.505217e-04 1.159335e-03
MGBC133411 7.434775e-04 1.077399e-03
Shewanella 7.349097e-04 2.541624e-03
Coprobacillus 6.481948e-04 1.468116e-03
IOR16 6.442482e-04 9.524998e-04
Kluyvera 6.393609e-04 2.904551e-03
UBA1174 5.845069e-04 3.314194e-03
Bradyrhizobium 5.704160e-04 2.271298e-03
HGM16780 5.648527e-04 2.422257e-03
Amedibacterium 5.449099e-04 2.608749e-03
Fimivivens 5.043338e-04 6.426073e-04
Anaerotignum 4.555793e-04 9.568867e-04
Citrobacter 4.246318e-04 1.467049e-03
Muricomes 4.103528e-04 6.526275e-04
51-20 3.322612e-04 1.965684e-03
UBA1794 3.101507e-04 5.552653e-04
JAGPHI01 2.969901e-04 6.792123e-04
Yersinia 2.846062e-04 1.069499e-03
Cryptoclostridium 2.752987e-04 6.465242e-04
Longicatena 2.747360e-04 1.625360e-03
Massilioclostridium 2.709713e-04 6.165792e-04
Dysosmobacter 2.360930e-04 4.680885e-04
CALXSC01 2.222364e-04 7.261421e-04
Hespellia 2.221637e-04 4.563625e-04
Scatenecus 1.939464e-04 1.011852e-03
SIG603 1.919687e-04 3.431537e-04
Faecalimonas 1.919022e-04 4.208517e-04
CAZU01 1.657047e-04 9.803223e-04
Lactonifactor 1.237770e-04 4.738322e-04
Enterococcus 9.119125e-05 5.394947e-04
MGBC107952 3.641870e-05 9.107289e-05
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

#Per pond
genus_summary %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~environment)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")